import urllib.request as ur
import urllib.parse as up
import lxml.etree as le
import re

url = 'https://so.csdn.net/so/search/s.do?{urlquery}&t=blog&viparticle=&domain=&o=&s=&u=&l=&f=&rbg=0'


# 根据url获取页面内容
def getResponse(url):
    request = ur.Request(
        url=url,
        headers={
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',
            'cookie': 'TY_SESSION_ID=8bd46190-bcf4-46b1-81d3-791990ff7317; JSESSIONID=E62C7C40954EC3B02974AEB2E236DAC9; uuid_tt_dd=10_17860783900-1587175054135-571117; dc_session_id=10_1587175054135.215036; __gads=ID=a03b0145950bbed4:T=1587175055:S=ALNI_MYPKmpOVEGJcCr0Qpf0NtFPQQPE2A; dc_sid=7539cc14c2ac6be17b911ac63cab3d34; c_first_ref=default; c_first_page=https%3A//edu.csdn.net/; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1587175068,1589079966; c-toolbar-writeguide=1; c_ref=https%3A//blog.csdn.net/; SESSION=8f36f20a-07a8-4c7d-8197-53c74a1e3a2b; UserName=wangzhiwei0721; UserInfo=c6df5ebc328b48dbbeeca693c9208c79; UserToken=c6df5ebc328b48dbbeeca693c9208c79; UserNick=%E7%8E%8B%E5%BF%97%E5%A8%81%E4%B8%B6; AU=C07; UN=wangzhiwei0721; BT=1589085437998; p_uid=U100000; Hm_up_6bcd52f51e9b3dce32bec4a3997715ac=%7B%22islogin%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isonline%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%2C%22isvip%22%3A%7B%22value%22%3A%221%22%2C%22scope%22%3A1%7D%7D; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_17860783900-1587175054135-571117!5744*1*wangzhiwei0721; announcement=%257B%2522isLogin%2522%253Atrue%252C%2522announcementUrl%2522%253A%2522https%253A%252F%252Fblog.csdn.net%252Fblogdevteam%252Farticle%252Fdetails%252F105203745%2522%252C%2522announcementCount%2522%253A0%252C%2522announcementExpire%2522%253A3600000%257D; searchHistoryArray=%255B%2522%25E5%25A4%259A%25E7%25BA%25BF%25E7%25A8%258B%2522%252C%2522%25E5%25A4%259A%25E7%258E%25B0%25E5%259C%25BA%2522%252C%2522Python%2522%252C%2522Java%2522%255D; dc_tos=qa3nah; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1589086889'
        }
    )
    response = ur.urlopen(request).read()
    return response


# 入口
if __name__ == '__main__':

    keyWord = input('请输入搜索关键词:')
    page_Star = int(input('请输入开始页码:'))
    page_End = int(input('请输入结束页码:'))
    print(keyWord)

    # 循环获取每页搜索内容
    for page in range(page_Star, page_End + 1):
        dataurl={
            'p':page,
            'q':keyWord
        }
        response = getResponse(
            url=url.format(
               urlquery=up.urlencode(dataurl)))
        Html_x = le.HTML(response)
        hrefs = Html_x.xpath("//div[@class='search-list-con']/dl//span[@class='mr16']/../../dt//a/@href")
        for href in hrefs:
            response_blog = getResponse(
                url=href,
            )
            title = le.HTML(response_blog).xpath("//h1[@class='title-article']/text()")[0]
            title = re.sub(
                r'[/\\:*"<>|?]', '', title
            )
            filepath = 'blog/%s.html' % title
            with open(filepath, 'wb') as f:
                f.write(response_blog)
                print(title)
